if (!require("dplyr")) install.packages("dplyr")
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
if (!require("skimr")) install.packages("skimr")
## Loading required package: skimr
if (!require("tidyr")) install.packages("tidyr")
## Loading required package: tidyr
if (!require("survival")) install.packages("survival")
## Loading required package: survival
if (!require("survminer")) install.packages("survminer")
## Loading required package: survminer
## Loading required package: ggplot2
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
if (!require("haven")) install.packages("haven")
## Loading required package: haven
if (!require("broom")) install.packages("broom")
## Loading required package: broom
if (!require("rms")) install.packages("rms")
## Loading required package: rms
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
if (!require("tidyverse")) install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.4     ✔ stringr   1.5.1
## ✔ purrr     1.0.4     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ Hmisc::src()       masks dplyr::src()
## ✖ Hmisc::summarize() masks dplyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if (!require("tableone")) install.packages("tableone")
## Loading required package: tableone
library(dplyr)
library(skimr)
library(tidyr)
library(survival)
library(survminer)
library(haven)
library(broom)
library(rms)
library(tidyverse) 
library(tableone)
NHANES2 <- read.csv("NHANES2-1 (1).csv")
d <- NHANES2 #%>% 
  #select('ROWNAMES','SEX','RACE','MARRY','DEATH','AGEYRS',
                    #'GRADES','WT', 'BOOZE', 'SIZE',
         #'AVGSMK', "HEIGHT", "EXAM_YR", "DIE_YR", "LAST_YR")
#Exclude missing death 
d <- d %>%
  filter(!is.na(BOOZE), !is.na(DEATH), !is.na(SEX), !is.na(RACE), !is.na(GRADES), !is.na(MARRY), !is.na(AVGSMK), !is.na(SIZE), !is.na(GRADES))

#BMI
d <- d %>%
  mutate(BMI = WT / (HEIGHT / 100)^2)

head(d$BMI)
## [1] 20.49522 21.02151 23.22748 35.72785 27.92312 30.50132
# GRADES and SIZE categories
d$EDUC_CAT <- cut(d$GRADES,
                  breaks = c(-Inf, 8, 11, 12, 15, Inf),
                  labels = c("≤8 yrs", "Some HS", "HS Grad", "Some College", "College+"),
                  right = TRUE)

d$SIZE_CAT <- cut(d$SIZE,
                  breaks = c(0, 3, 5, 7, 8),
                  labels = c("Rural", "Small town", "Medium city", "Large city"),
                  right = TRUE)

# Catergorical BOOZE
d <- d %>%
  mutate(BOOZE_q = cut(
    BOOZE,
    breaks = c(-1, 0, 0.5, 2.0, 77.0),
    include.lowest = TRUE,
    labels = c("0/week", "0–0.5/week", "0.5–2/week", ">2/week")
  ))

vars <- c("AGEYRS", "SEX", "RACE", "MARRY", "BMI", "AVGSMK", "EDUC_CAT", "SIZE_CAT")
catVars <- c("SEX", "RACE", "MARRY")

#Table 1

table1 <- CreateTableOne(vars = vars, 
                         data = d, 
                         strata = "BOOZE_q",  
                         factorVars = catVars)

print(table1, showAllLevels = TRUE)
##                     Stratified by BOOZE_q
##                      level        0/week        0–0.5/week    0.5–2/week   
##   n                                4053           941          1729        
##   AGEYRS (mean (SD))              57.09 (12.79) 54.34 (13.36) 51.60 (13.53)
##   SEX (%)            1             1448 (35.7)    367 (39.0)    856 (49.5) 
##                      2             2605 (64.3)    574 (61.0)    873 (50.5) 
##   RACE (%)           1             3497 (86.3)    827 (87.9)   1515 (87.6) 
##                      2              475 (11.7)     93 ( 9.9)    194 (11.2) 
##                      3               81 ( 2.0)     21 ( 2.2)     20 ( 1.2) 
##   MARRY (%)          2             2885 (71.2)    683 (72.6)   1288 (74.5) 
##                      3              671 (16.6)    127 (13.5)    172 ( 9.9) 
##                      4              190 ( 4.7)     67 ( 7.1)    102 ( 5.9) 
##                      5               96 ( 2.4)     20 ( 2.1)     59 ( 3.4) 
##                      6              202 ( 5.0)     40 ( 4.3)    103 ( 6.0) 
##                      8                9 ( 0.2)      4 ( 0.4)      5 ( 0.3) 
##   BMI (mean (SD))                 26.55 (5.50)  26.42 (5.10)  26.03 (4.84) 
##   AVGSMK (mean (SD))               4.82 (10.84)  6.72 (12.63)  8.26 (13.77)
##   EDUC_CAT (%)       ≤8 yrs        1452 (35.8)    217 (23.1)    337 (19.5) 
##                      Some HS        753 (18.6)    185 (19.7)    282 (16.3) 
##                      HS Grad       1209 (29.8)    311 (33.0)    636 (36.8) 
##                      Some College   353 ( 8.7)    130 (13.8)    235 (13.6) 
##                      College+       286 ( 7.1)     98 (10.4)    239 (13.8) 
##   SIZE_CAT (%)       Rural         1101 (27.2)    348 (37.0)    758 (43.8) 
##                      Small town     454 (11.2)    123 (13.1)    244 (14.1) 
##                      Medium city    569 (14.0)    118 (12.5)    205 (11.9) 
##                      Large city    1929 (47.6)    352 (37.4)    522 (30.2) 
##                     Stratified by BOOZE_q
##                      >2/week       p      test
##   n                   2527                    
##   AGEYRS (mean (SD)) 51.71 (13.18) <0.001     
##   SEX (%)             1678 (66.4)  <0.001     
##                        849 (33.6)             
##   RACE (%)            2250 (89.0)   0.010     
##                        236 ( 9.3)             
##                         41 ( 1.6)             
##   MARRY (%)           1972 (78.0)  <0.001     
##                        170 ( 6.7)             
##                        168 ( 6.6)             
##                         70 ( 2.8)             
##                        140 ( 5.5)             
##                          7 ( 0.3)             
##   BMI (mean (SD))    25.20 (4.08)  <0.001     
##   AVGSMK (mean (SD))  9.60 (13.92) <0.001     
##   EDUC_CAT (%)         387 (15.3)  <0.001     
##                        354 (14.0)             
##                        876 (34.7)             
##                        411 (16.3)             
##                        499 (19.7)             
##   SIZE_CAT (%)        1239 (49.0)  <0.001     
##                        346 (13.7)             
##                        261 (10.3)             
##                        681 (26.9)
#Create follow-up time
d$start <- d$EXAM_YR + d$EXAM_MO / 12


d$end <- ifelse(d$DEATH == 1,
                      d$DIE_YR + d$DIE_MO / 12,
                      d$LAST_YR + d$LAST_MO / 12)

d$FU <- d$end - d$start

#Adjusted Cox regression with SEX
cox <- coxph(Surv(FU, DEATH) ~ as.factor(BOOZE_q) + SEX + 
               as.factor(RACE) + as.factor(GRADES) + as.factor(MARRY) + BMI + 
               AVGSMK + SIZE, data = d, ties='efron')
summary(cox)
## Call:
## coxph(formula = Surv(FU, DEATH) ~ as.factor(BOOZE_q) + SEX + 
##     as.factor(RACE) + as.factor(GRADES) + as.factor(MARRY) + 
##     BMI + AVGSMK + SIZE, data = d, ties = "efron")
## 
##   n= 9250, number of events= 2145 
## 
##                                   coef exp(coef)  se(coef)       z Pr(>|z|)    
## as.factor(BOOZE_q)0–0.5/week -0.131786  0.876529  0.075867  -1.737 0.082376 .  
## as.factor(BOOZE_q)0.5–2/week -0.390542  0.676690  0.065568  -5.956 2.58e-09 ***
## as.factor(BOOZE_q)>2/week    -0.333146  0.716665  0.058474  -5.697 1.22e-08 ***
## SEX                          -0.773141  0.461561  0.049172 -15.723  < 2e-16 ***
## as.factor(RACE)2             -0.265985  0.766450  0.075590  -3.519 0.000434 ***
## as.factor(RACE)3             -0.513794  0.598222  0.198879  -2.583 0.009782 ** 
## as.factor(GRADES)1            0.599987  1.822095  0.314018   1.911 0.056046 .  
## as.factor(GRADES)2            0.520093  1.682184  0.295838   1.758 0.078742 .  
## as.factor(GRADES)3            0.701697  2.017172  0.252651   2.777 0.005481 ** 
## as.factor(GRADES)4            0.668808  1.951909  0.244717   2.733 0.006276 ** 
## as.factor(GRADES)5            0.707530  2.028973  0.250421   2.825 0.004723 ** 
## as.factor(GRADES)6            0.344214  1.410881  0.236150   1.458 0.144949    
## as.factor(GRADES)7            0.391113  1.478625  0.237484   1.647 0.099578 .  
## as.factor(GRADES)8            0.524292  1.689263  0.220388   2.379 0.017362 *  
## as.factor(GRADES)9            0.393780  1.482575  0.230085   1.711 0.086997 .  
## as.factor(GRADES)10           0.177886  1.194689  0.228746   0.778 0.436771    
## as.factor(GRADES)11           0.214700  1.239490  0.235460   0.912 0.361857    
## as.factor(GRADES)12          -0.033569  0.966988  0.218656  -0.154 0.877984    
## as.factor(GRADES)13          -0.132898  0.875555  0.243908  -0.545 0.585843    
## as.factor(GRADES)14          -0.214628  0.806841  0.241954  -0.887 0.375046    
## as.factor(GRADES)15          -0.177631  0.837251  0.272086  -0.653 0.513853    
## as.factor(GRADES)16          -0.170102  0.843579  0.237427  -0.716 0.473721    
## as.factor(GRADES)17          -0.615065  0.540606  0.248169  -2.478 0.013197 *  
## as.factor(MARRY)3             0.708942  2.031840  0.062116  11.413  < 2e-16 ***
## as.factor(MARRY)4             0.065615  1.067815  0.101914   0.644 0.519686    
## as.factor(MARRY)5             0.030171  1.030631  0.144768   0.208 0.834910    
## as.factor(MARRY)6             0.156516  1.169430  0.096475   1.622 0.104728    
## as.factor(MARRY)8             0.746167  2.108901  0.336343   2.218 0.026523 *  
## BMI                          -0.013787  0.986307  0.004682  -2.945 0.003232 ** 
## AVGSMK                        0.004971  1.004984  0.001629   3.051 0.002279 ** 
## SIZE                         -0.022890  0.977370  0.008506  -2.691 0.007120 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                              exp(coef) exp(-coef) lower .95 upper .95
## as.factor(BOOZE_q)0–0.5/week    0.8765     1.1409    0.7554    1.0171
## as.factor(BOOZE_q)0.5–2/week    0.6767     1.4778    0.5951    0.7695
## as.factor(BOOZE_q)>2/week       0.7167     1.3954    0.6391    0.8037
## SEX                             0.4616     2.1666    0.4192    0.5083
## as.factor(RACE)2                0.7665     1.3047    0.6609    0.8888
## as.factor(RACE)3                0.5982     1.6716    0.4051    0.8834
## as.factor(GRADES)1              1.8221     0.5488    0.9846    3.3718
## as.factor(GRADES)2              1.6822     0.5945    0.9420    3.0039
## as.factor(GRADES)3              2.0172     0.4957    1.2294    3.3098
## as.factor(GRADES)4              1.9519     0.5123    1.2082    3.1533
## as.factor(GRADES)5              2.0290     0.4929    1.2420    3.3146
## as.factor(GRADES)6              1.4109     0.7088    0.8881    2.2413
## as.factor(GRADES)7              1.4786     0.6763    0.9283    2.3551
## as.factor(GRADES)8              1.6893     0.5920    1.0967    2.6019
## as.factor(GRADES)9              1.4826     0.6745    0.9444    2.3274
## as.factor(GRADES)10             1.1947     0.8370    0.7630    1.8705
## as.factor(GRADES)11             1.2395     0.8068    0.7813    1.9664
## as.factor(GRADES)12             0.9670     1.0341    0.6299    1.4844
## as.factor(GRADES)13             0.8756     1.1421    0.5428    1.4122
## as.factor(GRADES)14             0.8068     1.2394    0.5022    1.2964
## as.factor(GRADES)15             0.8373     1.1944    0.4912    1.4271
## as.factor(GRADES)16             0.8436     1.1854    0.5297    1.3435
## as.factor(GRADES)17             0.5406     1.8498    0.3324    0.8793
## as.factor(MARRY)3               2.0318     0.4922    1.7989    2.2949
## as.factor(MARRY)4               1.0678     0.9365    0.8745    1.3039
## as.factor(MARRY)5               1.0306     0.9703    0.7760    1.3688
## as.factor(MARRY)6               1.1694     0.8551    0.9680    1.4128
## as.factor(MARRY)8               2.1089     0.4742    1.0908    4.0771
## BMI                             0.9863     1.0139    0.9773    0.9954
## AVGSMK                          1.0050     0.9950    1.0018    1.0082
## SIZE                            0.9774     1.0232    0.9612    0.9938
## 
## Concordance= 0.661  (se = 0.006 )
## Likelihood ratio test= 651  on 31 df,   p=<2e-16
## Wald test            = 649.1  on 31 df,   p=<2e-16
## Score (logrank) test = 673.9  on 31 df,   p=<2e-16
cox.zph(cox)
##                     chisq df       p
## as.factor(BOOZE_q)  7.813  3 0.05004
## SEX                11.185  1 0.00082
## as.factor(RACE)     1.933  2 0.38039
## as.factor(GRADES)  20.679 17 0.24099
## as.factor(MARRY)    4.452  5 0.48634
## BMI                 0.173  1 0.67753
## AVGSMK              0.458  1 0.49844
## SIZE                0.264  1 0.60707
## GLOBAL             48.114 31 0.02566
plot(cox.zph(cox))

#Product term with SEX Cox regression
cox_product <- coxph(Surv(FU, DEATH) ~ as.factor(BOOZE_q)*SEX + 
                     as.factor(RACE) + as.factor(GRADES) + as.factor(MARRY) + 
                     BMI + AVGSMK + SIZE, data = d, ties = "efron")
summary(cox_product)
## Call:
## coxph(formula = Surv(FU, DEATH) ~ as.factor(BOOZE_q) * SEX + 
##     as.factor(RACE) + as.factor(GRADES) + as.factor(MARRY) + 
##     BMI + AVGSMK + SIZE, data = d, ties = "efron")
## 
##   n= 9250, number of events= 2145 
## 
##                                       coef exp(coef)  se(coef)       z Pr(>|z|)
## as.factor(BOOZE_q)0–0.5/week     -0.161405  0.850947  0.236705  -0.682 0.495313
## as.factor(BOOZE_q)0.5–2/week     -0.453902  0.635145  0.194085  -2.339 0.019352
## as.factor(BOOZE_q)>2/week        -0.567586  0.566892  0.166506  -3.409 0.000653
## SEX                              -0.817512  0.441529  0.064487 -12.677  < 2e-16
## as.factor(RACE)2                 -0.266209  0.766279  0.075599  -3.521 0.000429
## as.factor(RACE)3                 -0.511089  0.599842  0.198899  -2.570 0.010182
## as.factor(GRADES)1                0.597777  1.818073  0.314051   1.903 0.056983
## as.factor(GRADES)2                0.516572  1.676272  0.295965   1.745 0.080919
## as.factor(GRADES)3                0.705668  2.025200  0.252666   2.793 0.005224
## as.factor(GRADES)4                0.674080  1.962227  0.244765   2.754 0.005887
## as.factor(GRADES)5                0.712374  2.038825  0.250470   2.844 0.004453
## as.factor(GRADES)6                0.347922  1.416121  0.236162   1.473 0.140688
## as.factor(GRADES)7                0.394282  1.483318  0.237521   1.660 0.096917
## as.factor(GRADES)8                0.527903  1.695374  0.220403   2.395 0.016612
## as.factor(GRADES)9                0.394902  1.484239  0.230093   1.716 0.086112
## as.factor(GRADES)10               0.181665  1.199212  0.228753   0.794 0.427108
## as.factor(GRADES)11               0.217739  1.243262  0.235479   0.925 0.355141
## as.factor(GRADES)12              -0.032482  0.968040  0.218671  -0.149 0.881915
## as.factor(GRADES)13              -0.134216  0.874401  0.243941  -0.550 0.582183
## as.factor(GRADES)14              -0.216948  0.804972  0.241980  -0.897 0.369958
## as.factor(GRADES)15              -0.178067  0.836886  0.272107  -0.654 0.512854
## as.factor(GRADES)16              -0.168185  0.845198  0.237443  -0.708 0.478749
## as.factor(GRADES)17              -0.613072  0.541684  0.248185  -2.470 0.013503
## as.factor(MARRY)3                 0.713826  2.041788  0.062203  11.476  < 2e-16
## as.factor(MARRY)4                 0.063219  1.065260  0.101959   0.620 0.535231
## as.factor(MARRY)5                 0.032512  1.033046  0.144813   0.225 0.822363
## as.factor(MARRY)6                 0.158054  1.171229  0.096484   1.638 0.101392
## as.factor(MARRY)8                 0.747464  2.111638  0.336519   2.221 0.026340
## BMI                              -0.013141  0.986945  0.004706  -2.793 0.005227
## AVGSMK                            0.004964  1.004976  0.001629   3.046 0.002316
## SIZE                             -0.022786  0.977472  0.008512  -2.677 0.007428
## as.factor(BOOZE_q)0–0.5/week:SEX  0.019546  1.019738  0.150620   0.130 0.896748
## as.factor(BOOZE_q)0.5–2/week:SEX  0.042411  1.043323  0.131314   0.323 0.746717
## as.factor(BOOZE_q)>2/week:SEX     0.183497  1.201412  0.120545   1.522 0.127949
##                                     
## as.factor(BOOZE_q)0–0.5/week        
## as.factor(BOOZE_q)0.5–2/week     *  
## as.factor(BOOZE_q)>2/week        ***
## SEX                              ***
## as.factor(RACE)2                 ***
## as.factor(RACE)3                 *  
## as.factor(GRADES)1               .  
## as.factor(GRADES)2               .  
## as.factor(GRADES)3               ** 
## as.factor(GRADES)4               ** 
## as.factor(GRADES)5               ** 
## as.factor(GRADES)6                  
## as.factor(GRADES)7               .  
## as.factor(GRADES)8               *  
## as.factor(GRADES)9               .  
## as.factor(GRADES)10                 
## as.factor(GRADES)11                 
## as.factor(GRADES)12                 
## as.factor(GRADES)13                 
## as.factor(GRADES)14                 
## as.factor(GRADES)15                 
## as.factor(GRADES)16                 
## as.factor(GRADES)17              *  
## as.factor(MARRY)3                ***
## as.factor(MARRY)4                   
## as.factor(MARRY)5                   
## as.factor(MARRY)6                   
## as.factor(MARRY)8                *  
## BMI                              ** 
## AVGSMK                           ** 
## SIZE                             ** 
## as.factor(BOOZE_q)0–0.5/week:SEX    
## as.factor(BOOZE_q)0.5–2/week:SEX    
## as.factor(BOOZE_q)>2/week:SEX       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                  exp(coef) exp(-coef) lower .95 upper .95
## as.factor(BOOZE_q)0–0.5/week        0.8509     1.1752    0.5351    1.3533
## as.factor(BOOZE_q)0.5–2/week        0.6351     1.5744    0.4342    0.9291
## as.factor(BOOZE_q)>2/week           0.5669     1.7640    0.4090    0.7857
## SEX                                 0.4415     2.2649    0.3891    0.5010
## as.factor(RACE)2                    0.7663     1.3050    0.6607    0.8887
## as.factor(RACE)3                    0.5998     1.6671    0.4062    0.8858
## as.factor(GRADES)1                  1.8181     0.5500    0.9824    3.3646
## as.factor(GRADES)2                  1.6763     0.5966    0.9385    2.9941
## as.factor(GRADES)3                  2.0252     0.4938    1.2342    3.3231
## as.factor(GRADES)4                  1.9622     0.5096    1.2145    3.1703
## as.factor(GRADES)5                  2.0388     0.4905    1.2479    3.3310
## as.factor(GRADES)6                  1.4161     0.7062    0.8914    2.2497
## as.factor(GRADES)7                  1.4833     0.6742    0.9312    2.3627
## as.factor(GRADES)8                  1.6954     0.5898    1.1007    2.6114
## as.factor(GRADES)9                  1.4842     0.6737    0.9455    2.3300
## as.factor(GRADES)10                 1.1992     0.8339    0.7659    1.8776
## as.factor(GRADES)11                 1.2433     0.8043    0.7837    1.9724
## as.factor(GRADES)12                 0.9680     1.0330    0.6306    1.4860
## as.factor(GRADES)13                 0.8744     1.1436    0.5421    1.4104
## as.factor(GRADES)14                 0.8050     1.2423    0.5010    1.2935
## as.factor(GRADES)15                 0.8369     1.1949    0.4910    1.4265
## as.factor(GRADES)16                 0.8452     1.1832    0.5307    1.3461
## as.factor(GRADES)17                 0.5417     1.8461    0.3330    0.8811
## as.factor(MARRY)3                   2.0418     0.4898    1.8074    2.3065
## as.factor(MARRY)4                   1.0653     0.9387    0.8723    1.3009
## as.factor(MARRY)5                   1.0330     0.9680    0.7778    1.3721
## as.factor(MARRY)6                   1.1712     0.8538    0.9694    1.4150
## as.factor(MARRY)8                   2.1116     0.4736    1.0919    4.0838
## BMI                                 0.9869     1.0132    0.9779    0.9961
## AVGSMK                              1.0050     0.9950    1.0018    1.0082
## SIZE                                0.9775     1.0230    0.9613    0.9939
## as.factor(BOOZE_q)0–0.5/week:SEX    1.0197     0.9806    0.7591    1.3699
## as.factor(BOOZE_q)0.5–2/week:SEX    1.0433     0.9585    0.8066    1.3496
## as.factor(BOOZE_q)>2/week:SEX       1.2014     0.8324    0.9486    1.5216
## 
## Concordance= 0.661  (se = 0.006 )
## Likelihood ratio test= 653.3  on 34 df,   p=<2e-16
## Wald test            = 658.5  on 34 df,   p=<2e-16
## Score (logrank) test = 694.2  on 34 df,   p=<2e-16
cox.zph(cox_product)
##                         chisq df       p
## as.factor(BOOZE_q)      7.669  3 0.05337
## SEX                    11.045  1 0.00089
## as.factor(RACE)         1.948  2 0.37762
## as.factor(GRADES)      20.607 17 0.24437
## as.factor(MARRY)        4.475  5 0.48325
## BMI                     0.172  1 0.67826
## AVGSMK                  0.472  1 0.49207
## SIZE                    0.253  1 0.61527
## as.factor(BOOZE_q):SEX 12.584  3 0.00563
## GLOBAL                 47.959 34 0.05671
plot(cox.zph(cox_product))